plus sn10x, SI

Baf53cre ENS neurons, SI data
Steady state, CTL x3 CKO(Ahr-cko) x3

loading 60k cells,
cellranger called 24,365 cells (demo); 24,604 cells (plus)

load dependancies

load 10x data

filt.10x <- Read10X(data.dir = "../output_plus/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32285 24604
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGAATACAC-1 AAACCCAAGCAATAGT-1 AAACCCAAGCGCTGCT-1
## Xkr4                     1                  2                  1
## Gm1992                   .                  1                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCAAGGTGAGCT-1 AAACCCAAGTCTAACC-1 AAACCCAAGTGGACGT-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  1                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]     6 24604
FB[,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##       AAACCCAAGAATACAC-1 AAACCCAAGCAATAGT-1 AAACCCAAGCGCTGCT-1
## CTL.1                 27                 33                 80
## CTL.2                  9                  4                 20
## CTL.3                 54                  9                 37
## CKO.1                 23                 15                 43
## CKO.2                 28                 30                552
## CKO.3                 62                194                165
##       AAACCCAAGGTGAGCT-1 AAACCCAAGTCTAACC-1 AAACCCAAGTGGACGT-1
## CTL.1                249                398                112
## CTL.2                  6                  7                  7
## CTL.3                 18                 54                 21
## CKO.1                 32                 39                 53
## CKO.2                 57                 58                 51
## CKO.3                 99                 90                906
rowSums(FB)
##   CTL.1   CTL.2   CTL.3   CKO.1   CKO.2   CKO.3 
## 3517685  380630  900857 1607280 3000253 4337461
rowSums(FB>0)
## CTL.1 CTL.2 CTL.3 CKO.1 CKO.2 CKO.3 
## 24603 24263 24583 24569 24602 24604

FB

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "Steady_SI")

FB.seur
## An object of class Seurat 
## 6 features across 24604 samples within 1 assay 
## Active assay: RNA (6 features, 0 variable features)
rownames(FB.seur)
## [1] "CTL.1" "CTL.2" "CTL.3" "CKO.1" "CKO.2" "CKO.3"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

first define FB colors based on conditions

color.FB <- ggsci::pal_d3("category20c")(20)[c(1,6,11,#16,
                                               2,7,12#,17
                                              
                                               )]


level.FB <- c(rownames(FB.seur))

color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 5)

scales::show_col(color.FB, ncol = 3)

par(mfrow=c(2,3))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])

plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])

range ‘q’

q0.50 ~ 0.99
table.demux
##    quantile Doublet CTL.1 CTL.2 CTL.3 CKO.1 CKO.2 CKO.3 Negative
## 1      0.50   21567   482   374   499   523   548   402      209
## 2      0.51   21385   516   404   543   542   573   422      219
## 3      0.52   21342   517   409   547   549   585   430      225
## 4      0.53   21050   578   442   565   623   642   470      234
## 5      0.54   20898   601   473   594   628   676   494      240
## 6      0.55   20784   629   495   616   648   683   506      243
## 7      0.56   20683   651   472   631   671   707   519      270
## 8      0.57   20457   676   518   679   698   752   542      282
## 9      0.58   20193   718   558   685   760   812   584      294
## 10     0.59   20097   743   581   704   780   811   589      299
## 11     0.60   19922   776   616   735   785   855   610      305
## 12     0.61   19780   797   644   778   813   858   625      309
## 13     0.62   19751   799   648   786   818   863   626      313
## 14     0.63   19178   923   701   852   906   972   702      370
## 15     0.64   19038   948   724   885   930   981   723      375
## 16     0.65   19012   947   727   890   937   989   725      377
## 17     0.66   18705  1002   780   962   966  1033   759      397
## 18     0.67   18623  1017   793   983   989  1031   767      401
## 19     0.68   18210  1102   859  1017  1040  1115   839      422
## 20     0.69   17991  1133   839  1068  1086  1139   872      476
## 21     0.70   17853  1147   859  1098  1117  1148   897      485
## 22     0.71   17678  1176   887  1142  1124  1185   913      499
## 23     0.72   17361  1227   942  1159  1199  1240   957      519
## 24     0.73   17071  1289   992  1221  1217  1289   988      537
## 25     0.74   16962  1308  1005  1247  1247  1289   992      554
## 26     0.75   16516  1380  1022  1290  1327  1387  1068      614
## 27     0.76   16404  1392  1040  1315  1350  1391  1084      628
## 28     0.77   16130  1440  1093  1371  1364  1436  1119      651
## 29     0.78   15817  1509  1155  1390  1420  1491  1153      669
## 30     0.79   15510  1572  1145  1465  1451  1536  1195      730
## 31     0.80   15289  1607  1169  1514  1478  1575  1229      743
## 32     0.81   15022  1649  1206  1526  1535  1615  1278      773
## 33     0.82   14781  1680  1247  1580  1562  1649  1312      793
## 34     0.83   14511  1727  1232  1648  1590  1687  1329      880
## 35     0.84   14211  1778  1269  1665  1660  1727  1374      920
## 36     0.85   14030  1808  1295  1709  1675  1747  1399      941
## 37     0.86   13536  1888  1327  1776  1751  1816  1476     1034
## 38     0.87   13340  1907  1355  1822  1768  1849  1497     1066
## 39     0.88   12979  1982  1417  1853  1817  1899  1555     1102
## 40     0.89   12733  2018  1375  1917  1833  1937  1577     1214
## 41     0.90   12345  2087  1438  1957  1894  1994  1616     1273
## 42     0.91   12075  2134  1433  2013  1915  2025  1642     1367
## 43     0.92   11740  2172  1479  2042  1973  2083  1673     1442
## 44     0.93   11229  2257  1500  2108  2037  2158  1721     1594
## 45     0.94   10881  2299  1556  2141  2092  2216  1727     1692
## 46     0.95   10447  2359  1552  2191  2124  2296  1762     1873
## 47     0.96    9919  2435  1545  2268  2165  2370  1783     2119
## 48     0.97    9314  2526  1558  2319  2223  2464  1775     2425
## 49     0.98    8517  2648  1537  2358  2298  2583  1737     2926
## 50     0.99    7418  2738  1469  2460  2362  2715  1659     3783
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,3))


plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CTL|CKO",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

#plot(table.demux$quantile, pch=16, ylab="mod-quantile")
#plot.new()

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])

q0.9900 ~ 0.9999
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,3))


plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CTL|CKO",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")

#plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
#plot.new()

plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])

#plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
#plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])

demultiplexing

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.92)
## Cutoff for CTL.1 : 80 reads
## Cutoff for CTL.2 : 16 reads
## Cutoff for CTL.3 : 24 reads
## Cutoff for CKO.1 : 38 reads
## Cutoff for CKO.2 : 64 reads
## Cutoff for CKO.3 : 145 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    11740     1442    11422
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##    11740     1442     2172     1479     2042     1973     2083     1673
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.93)
## Cutoff for CTL.1 : 83 reads
## Cutoff for CTL.2 : 17 reads
## Cutoff for CTL.3 : 25 reads
## Cutoff for CKO.1 : 40 reads
## Cutoff for CKO.2 : 66 reads
## Cutoff for CKO.3 : 151 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    11229     1594    11781
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##    11229     1594     2257     1500     2108     2037     2158     1721
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.94)
## Cutoff for CTL.1 : 86 reads
## Cutoff for CTL.2 : 17 reads
## Cutoff for CTL.3 : 26 reads
## Cutoff for CKO.1 : 41 reads
## Cutoff for CKO.2 : 69 reads
## Cutoff for CKO.3 : 157 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    10881     1692    12031
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##    10881     1692     2299     1556     2141     2092     2216     1727
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.95)
## Cutoff for CTL.1 : 90 reads
## Cutoff for CTL.2 : 18 reads
## Cutoff for CTL.3 : 27 reads
## Cutoff for CKO.1 : 43 reads
## Cutoff for CKO.2 : 72 reads
## Cutoff for CKO.3 : 165 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    10447     1873    12284
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##    10447     1873     2359     1552     2191     2124     2296     1762
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.96)
## Cutoff for CTL.1 : 95 reads
## Cutoff for CTL.2 : 19 reads
## Cutoff for CTL.3 : 28 reads
## Cutoff for CKO.1 : 46 reads
## Cutoff for CKO.2 : 76 reads
## Cutoff for CKO.3 : 175 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9919     2119    12566
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     9919     2119     2435     1545     2268     2165     2370     1783
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.97)
## Cutoff for CTL.1 : 101 reads
## Cutoff for CTL.2 : 20 reads
## Cutoff for CTL.3 : 30 reads
## Cutoff for CKO.1 : 49 reads
## Cutoff for CKO.2 : 81 reads
## Cutoff for CKO.3 : 187 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     9314     2425    12865
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     9314     2425     2526     1558     2319     2223     2464     1775
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.98)
## Cutoff for CTL.1 : 110 reads
## Cutoff for CTL.2 : 22 reads
## Cutoff for CTL.3 : 33 reads
## Cutoff for CKO.1 : 53 reads
## Cutoff for CKO.2 : 88 reads
## Cutoff for CKO.3 : 204 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     8517     2926    13161
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     8517     2926     2648     1537     2358     2298     2583     1737
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for CTL.1 : 124 reads
## Cutoff for CTL.2 : 25 reads
## Cutoff for CTL.3 : 36 reads
## Cutoff for CKO.1 : 61 reads
## Cutoff for CKO.2 : 99 reads
## Cutoff for CKO.3 : 232 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     7418     3783    13403
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     7418     3783     2738     1469     2460     2362     2715     1659
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.992)
## Cutoff for CTL.1 : 129 reads
## Cutoff for CTL.2 : 26 reads
## Cutoff for CTL.3 : 38 reads
## Cutoff for CKO.1 : 63 reads
## Cutoff for CKO.2 : 103 reads
## Cutoff for CKO.3 : 241 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     7026     4080    13498
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     7026     4080     2765     1445     2463     2408     2767     1650
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for CTL.1 : 135 reads
## Cutoff for CTL.2 : 27 reads
## Cutoff for CTL.3 : 39 reads
## Cutoff for CKO.1 : 66 reads
## Cutoff for CKO.2 : 108 reads
## Cutoff for CKO.3 : 253 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     6624     4402    13578
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     6624     4402     2776     1446     2502     2412     2823     1619
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for CTL.1 : 143 reads
## Cutoff for CTL.2 : 28 reads
## Cutoff for CTL.3 : 41 reads
## Cutoff for CKO.1 : 70 reads
## Cutoff for CKO.2 : 114 reads
## Cutoff for CKO.3 : 269 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     6072     4807    13725
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     6072     4807     2814     1452     2544     2441     2893     1581
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for CTL.1 : 157 reads
## Cutoff for CTL.2 : 31 reads
## Cutoff for CTL.3 : 45 reads
## Cutoff for CKO.1 : 77 reads
## Cutoff for CKO.2 : 125 reads
## Cutoff for CKO.3 : 297 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##     5185     5676    13743
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     5185     5676     2826     1365     2557     2491     2984     1520
# tags q0.99
#cutoff.FB <- c(124,25,36,
#               61,99,232)
#
# manual ref.
# 
# CTL.1  q0.99
# CTL.2  q0.95
# CTL.3  q0.99
# CKO.1  q0.99
# CKO.2  q0.996
# CKO.3  q0.96
#
cutoff.FB <- c(124,18,36,
               61,114,180)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## CTL.1 CTL.2 CTL.3 CKO.1 CKO.2 CKO.3 
##   124    18    36    61   114   180
data.frame(cutoff.FB=cutoff.FB)
##       cutoff.FB
## CTL.1       124
## CTL.2        18
## CTL.3        36
## CKO.1        61
## CKO.2       114
## CKO.3       180
par(mfrow=c(2,3))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])

plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])

par(mfrow=c(2,3))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])

plot(sort(t(FB)[,4],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])

## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}

df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 24604     2
df.FB[1:10,]
##                    HTO_classification.global hash.ID
## AAACCCAAGAATACAC-1                   Singlet   CTL.3
## AAACCCAAGCAATAGT-1                   Singlet   CKO.3
## AAACCCAAGCGCTGCT-1                   Doublet Doublet
## AAACCCAAGGTGAGCT-1                   Singlet   CTL.1
## AAACCCAAGTCTAACC-1                   Doublet Doublet
## AAACCCAAGTGGACGT-1                   Singlet   CKO.3
## AAACCCAAGTTGAAGT-1                   Doublet Doublet
## AAACCCACAACGAGGT-1                   Singlet   CTL.1
## AAACCCACAAGAGTAT-1                   Singlet   CKO.3
## AAACCCACAAGATGGC-1                   Doublet Doublet
## custom cutoff run this
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative CTL.1 CTL.2 CTL.3 CKO.1 CKO.2 CKO.3
##                  Doublet     8028        0     0     0     0     0     0     0
##                  Negative       0     2857     0     0     0     0     0     0
##                  Singlet        0        0  2564  1922  2370  2273  2540  2050
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                    orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAATACAC-1  Steady_SI        203            6        203            6
## AAACCCAAGCAATAGT-1  Steady_SI        285            6        285            6
## AAACCCAAGCGCTGCT-1  Steady_SI        897            6        897            6
## AAACCCAAGGTGAGCT-1  Steady_SI        461            6        461            6
##                    HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAATACAC-1     CTL.3        CTL.2  0.5683494              CTL.3
## AAACCCAAGCAATAGT-1     CKO.3        CKO.2  0.6510357           Negative
## AAACCCAAGCGCTGCT-1     CKO.2        CTL.2  1.2335365              CKO.2
## AAACCCAAGGTGAGCT-1     CTL.1        CKO.3  0.7715530              CTL.1
##                    HTO_classification.global hash.ID
## AAACCCAAGAATACAC-1                   Singlet   CTL.3
## AAACCCAAGCAATAGT-1                   Singlet   CKO.3
## AAACCCAAGCGCTGCT-1                   Doublet Doublet
## AAACCCAAGGTGAGCT-1                   Singlet   CTL.1
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,18500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1280,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=3,
          cols = color.FB) 

with ridge plots, filtered
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=3,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:6, perplexity = 400, check_duplicates = FALSE)


#saveRDS(FB.seur.subset, "FB0429.seur.subset.rds")
FB.seur.subset <- readRDS("FB0429.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID.sort', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
## 
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
## 
##     pattern

stat

FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
                                            levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))

VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)

table(FB.seur@meta.data$hash.ID.sort)
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     8028     2857     2564     1922     2370     2273     2540     2050
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,9600),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:8*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:8*1.2-0.45,y=sl_stat+535,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "Steady_SI")
GEX.seur
## An object of class Seurat 
## 22951 features across 24604 samples within 1 assay 
## Active assay: RNA (22951 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CKO.1    CKO.2    CKO.3 
##     8028     2857     2564     1922     2370     2273     2540     2050
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat 
## 22951 features across 13719 samples within 1 assay 
## Active assay: RNA (22951 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset = percent.mt < 1.5 & nFeature_RNA > 200 & nFeature_RNA < 2800 & nCount_RNA < 6000)
GEX.seur
## An object of class Seurat 
## 22951 features across 13503 samples within 1 assay 
## Active assay: RNA (22951 features, 0 variable features)

filtered obj

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,9600),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:8*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:8*1.2-0.45,y=sl_stat+535,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,3200),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:8*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:8*1.2-0.45,y=sl_stat+195,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
## Warning: The following features are not present in the object: E2f8, not
## searching for symbol synonyms
## Warning: The following features are not present in the object: Cdk1, Nek2, not
## searching for symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 200)
##   [1] "Wdr17"         "Mgat4c"        "Cntn5"         "Zfp804a"      
##   [5] "Klhl1"         "Gal"           "Gm30613"       "Adgrg6"       
##   [9] "Gm32647"       "Cntnap2"       "Kcnb2"         "Gm29521"      
##  [13] "Robo2"         "Brinp3"        "Ntng1"         "Cpne4"        
##  [17] "Nmu"           "Cdh8"          "Nrxn3"         "Zfp804b"      
##  [21] "Lingo2"        "Cemip"         "Gm21847"       "Kcnip4"       
##  [25] "Ano2"          "Pdzrn4"        "Pcdh9"         "Gm15261"      
##  [29] "Cdh9"          "Grm5"          "Sgcz"          "Tmeff2"       
##  [33] "Ebf1"          "AC150683.1"    "Nrg1"          "Cdh18"        
##  [37] "Ntrk3"         "Gpr149"        "Prkg1"         "Vip"          
##  [41] "Schip1"        "4930509J09Rik" "Dgkg"          "Vwc2l"        
##  [45] "Car10"         "March1"        "Cmah"          "Kcnq5"        
##  [49] "Cdh6"          "Astn2"         "Unc5d"         "Nxph2"        
##  [53] "Myh11"         "Fgf14"         "Clstn2"        "Gm38505"      
##  [57] "Rbpms"         "Asic2"         "1700063D05Rik" "Gm20754"      
##  [61] "Piezo2"        "Gpm6a"         "Dcc"           "Grin3a"       
##  [65] "Sema5a"        "Sst"           "Kcnh7"         "4930432L08Rik"
##  [69] "Gpc5"          "Gm26612"       "Zfhx3"         "Pbx3"         
##  [73] "Spock3"        "Gm15680"       "Pcdh10"        "Gm15584"      
##  [77] "Chsy3"         "Meis2"         "Dpp10"         "Fmo2"         
##  [81] "Pde1a"         "Efr3a"         "Gm30382"       "Egfem1"       
##  [85] "Serpini1"      "Cysltr2"       "Ssc4d"         "Csmd3"        
##  [89] "Gm26737"       "Ccbe1"         "4930422I22Rik" "Ttc29"        
##  [93] "Itgb6"         "Robo1"         "Fut9"          "Rasgef1b"     
##  [97] "Cpa6"          "Ano1"          "Plxna4"        "Tnr"          
## [101] "Abca9"         "Penk"          "Tbx20"         "Gm43388"      
## [105] "Chrna9"        "Csmd1"         "Pcdh11x"       "Gm20713"      
## [109] "9530059O14Rik" "Dgkb"          "Pde4d"         "Plcl1"        
## [113] "Dgki"          "1700034P13Rik" "C3"            "Myo3b"        
## [117] "Tac1"          "1700111E14Rik" "Luzp2"         "Kctd16"       
## [121] "Trhde"         "Gm32916"       "Il1rapl1"      "Col25a1"      
## [125] "Gm26632"       "D030068K23Rik" "Gm49938"       "B230323A14Rik"
## [129] "Tafa2"         "Galntl6"       "B230312C02Rik" "Arhgap6"      
## [133] "Skap1"         "Cdh19"         "Cacna2d3"      "Hpse2"        
## [137] "Trps1"         "Myl1"          "Grik1"         "Nwd1"         
## [141] "Gm19784"       "5530401A14Rik" "Gm37267"       "Adamtsl3"     
## [145] "Gm11342"       "Sox2ot"        "Calcb"         "Foxp2"        
## [149] "Shisa9"        "Kcnd2"         "Grm8"          "Rab27b"       
## [153] "Hs6st3"        "Ghr"           "Gm29771"       "Gm12239"      
## [157] "Rerg"          "Chrm3"         "Clcnka"        "9130019P16Rik"
## [161] "Kctd8"         "Cntn4"         "Col8a1"        "Nos1"         
## [165] "Gna14"         "Cux2"          "Gm13561"       "2310039L15Rik"
## [169] "Galr1"         "C7"            "Dkk2"          "Gm50255"      
## [173] "Gm28494"       "4930587E11Rik" "Casp8"         "4930471E19Rik"
## [177] "Xirp2"         "G930045G22Rik" "Slamf1"        "4931415C17Rik"
## [181] "Nxph1"         "Galnt18"       "Etl4"          "Tenm2"        
## [185] "Rarb"          "Bmp5"          "Gm30094"       "Gm20560"      
## [189] "Gm26713"       "Lama2"         "Hsd17b14"      "E130310I04Rik"
## [193] "Kif26b"        "4930445B16Rik" "Itga1"         "A330008L17Rik"
## [197] "Nell1"         "Slc4a4"        "Gm49678"       "Dock10"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AI|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Ntrk3, Ano2, Robo2, Tmeff2, Cdh8, Cpne4, Nrxn3, Adgrg6, Gpr149, Clstn2 
##     Cntn5, Mgat4c, Spock3, Pdzrn4, Zfp804a, Cdh6, Dgkg, Grin3a, Pcdh10, Sgcz 
##     Myl1, Cysltr2, Astn2, Plxna4, Cux2, Itgb6, Cacna2d3, Tnr, Arhgap6, Kcnb2 
## Negative:  Lrrtm4, Galntl6, Tox, Ndst4, Grik1, Epha5, Kcnd2, Bnc2, Pcdh7, Pde3a 
##     Nos1, Kcnab1, Lrp1b, Tshz2, Cdc14a, Chrna7, Specc1, Syt6, Zfp536, Synpo2 
##     Plcxd3, Fbxw15, Lrrc4c, Dach1, Rspo2, Brinp2, Fat3, Fbxw24, Tenm3, Caln1 
## PC_ 2 
## Positive:  Egfem1, Auts2, Nos1, Schip1, Kcnq5, Asic2, Cadps2, Etv1, Cmah, Kcnab1 
##     Epha5, Alcam, Gfra1, Dach1, Kcnt2, Dgkb, Hs6st3, Ebf1, Rgs6, Lrrc4c 
##     Stxbp6, Il1rapl1, Rgs7, Kcnj3, Cdh11, Pde4d, Creb5, Fat3, Tenm3, Ltk 
## Negative:  Rbfox1, Bnc2, Ptprt, Tshz2, Tafa1, Gpc6, Grik1, Frmd4b, St6galnac3, Brinp2 
##     Fbxw15, Caln1, Fbxw24, Pcdh7, Tox, Tcf7l2, Plcxd3, Tmem132c, Cdc14a, Dock2 
##     Pld5, Specc1, Unc5c, Adamtsl1, Oprk1, Nfia, Sdk2, Ccbe1, Alk, Tmem132cos 
## PC_ 3 
## Positive:  Cdh18, Klhl1, Kcnip4, Pbx3, Csmd3, Gpc6, Kctd8, Cadm2, Htr4, Meis1 
##     C79798, March1, Dlc1, Zfhx3, Vwc2l, Skap1, Csmd1, Serpini1, Cntn3, Sema5a 
##     Galnt18, Zbbx, Gabrg3, Cdh9, Khdrbs2, Car10, Stxbp5l, Tenm2, Sybu, Plcl1 
## Negative:  Adgrg6, Sgcd, Nos1, Cysltr2, Nmu, Gpr149, Grin3a, Slc4a4, Rgs6, Itgb6 
##     Kcnab1, Dapk2, Ngfr, Ano2, Zfp804a, Cbln2, Fgf13, Ccbe1, Dgkg, Efr3a 
##     Cpne4, Nfia, Gfra1, Otof, Syt2, Pcdh10, Zfp536, Lhfpl2, Calcb, Pex7 
## PC_ 4 
## Positive:  March1, Klhl1, Sema5a, Vwc2l, Galnt18, Cdh9, Zfhx3, Pbx3, Kcnh7, Chsy3 
##     Rasgef1b, C79798, Prune2, Ntng1, Bcl2, Il1rapl1, Zbbx, Dcc, Tenm4, Lncbate1 
##     Kcnb2, Alk, Kif26b, Mgat4c, Olfr78, Ebf1, Sdk1, Bmpr1b, Sntg1, Gna14 
## Negative:  Dock10, Lingo2, Kcnip4, Ndst4, Tac1, Gda, Nxph1, Sorcs1, Csmd3, Epha5 
##     Dmd, Cntn5, Lrrtm4, Lrp1b, Prkg1, Thsd4, Cntn3, Lrrc7, Kctd8, Kcnt2 
##     Hgf, Lrrc4c, Lin7a, Grm7, Pld5, Nyap2, Fgf13, Unc5d, Ccdc60, Lama2 
## PC_ 5 
## Positive:  Trhde, Cntn4, Ebf1, Nrg1, Trps1, Ptprd, Cpa6, Csmd1, Gal, Kcnd2 
##     Zmat4, Col18a1, Npas3, Rmst, Egfem1, Chsy3, Luzp2, Kcnk13, Nkain3, Fstl4 
##     Ptprt, Shisa6, Synpr, Hs6st3, St18, Plcxd3, Nav2, Myo16, Ntng1, Adgrl2 
## Negative:  Dgkb, Kcnt2, Alcam, Thsd7b, Klhl1, Vwc2l, Lrrc4c, Prkg1, Fgf13, Mgat4c 
##     Dpp10, Khdrbs2, Rasgef1b, Cdh9, Il1rapl1, Nos1, Cdh20, Zfhx3, Pbx3, Rgs6 
##     Gucy1a2, Stxbp6, Auts2, Slc44a5, Galnt18, Vcan, Zbbx, Nmur2, C79798, Hmcn1
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene)))
## [1] 1040
setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene))[1:300]
##   [1] "Wdr17"      "Mgat4c"     "Cntn5"      "Zfp804a"    "Klhl1"     
##   [6] "Gal"        "Adgrg6"     "Cntnap2"    "Kcnb2"      "Robo2"     
##  [11] "Brinp3"     "Ntng1"      "Cpne4"      "Nmu"        "Cdh8"      
##  [16] "Nrxn3"      "Zfp804b"    "Lingo2"     "Cemip"      "Kcnip4"    
##  [21] "Ano2"       "Pdzrn4"     "Pcdh9"      "Cdh9"       "Grm5"      
##  [26] "Sgcz"       "Tmeff2"     "Ebf1"       "Nrg1"       "Cdh18"     
##  [31] "Ntrk3"      "Gpr149"     "Prkg1"      "Vip"        "Schip1"    
##  [36] "Dgkg"       "Vwc2l"      "Car10"      "March1"     "Cmah"      
##  [41] "Kcnq5"      "Cdh6"       "Astn2"      "Unc5d"      "Nxph2"     
##  [46] "Myh11"      "Fgf14"      "Clstn2"     "Rbpms"      "Asic2"     
##  [51] "Piezo2"     "Gpm6a"      "Dcc"        "Grin3a"     "Sema5a"    
##  [56] "Sst"        "Kcnh7"      "Gpc5"       "Zfhx3"      "Pbx3"      
##  [61] "Spock3"     "Pcdh10"     "Chsy3"      "Meis2"      "Dpp10"     
##  [66] "Fmo2"       "Pde1a"      "Efr3a"      "Egfem1"     "Serpini1"  
##  [71] "Cysltr2"    "Ssc4d"      "Csmd3"      "Ccbe1"      "Ttc29"     
##  [76] "Itgb6"      "Robo1"      "Fut9"       "Rasgef1b"   "Cpa6"      
##  [81] "Ano1"       "Plxna4"     "Tnr"        "Abca9"      "Penk"      
##  [86] "Tbx20"      "Chrna9"     "Csmd1"      "Pcdh11x"    "Dgkb"      
##  [91] "Pde4d"      "Plcl1"      "Dgki"       "C3"         "Myo3b"     
##  [96] "Tac1"       "Luzp2"      "Kctd16"     "Trhde"      "Il1rapl1"  
## [101] "Col25a1"    "Tafa2"      "Galntl6"    "Arhgap6"    "Skap1"     
## [106] "Cdh19"      "Cacna2d3"   "Hpse2"      "Trps1"      "Myl1"      
## [111] "Grik1"      "Nwd1"       "Adamtsl3"   "Sox2ot"     "Calcb"     
## [116] "Foxp2"      "Shisa9"     "Kcnd2"      "Grm8"       "Rab27b"    
## [121] "Hs6st3"     "Ghr"        "Rerg"       "Chrm3"      "Clcnka"    
## [126] "Kctd8"      "Cntn4"      "Col8a1"     "Nos1"       "Gna14"     
## [131] "Cux2"       "Galr1"      "C7"         "Dkk2"       "Casp8"     
## [136] "Xirp2"      "Slamf1"     "Nxph1"      "Galnt18"    "Etl4"      
## [141] "Tenm2"      "Rarb"       "Bmp5"       "Lama2"      "Hsd17b14"  
## [146] "Kif26b"     "Itga1"      "Nell1"      "Slc4a4"     "Dock10"    
## [151] "Kirrel3"    "Arhgap15os" "Zfp286os"   "Mecom"      "Adamts9"   
## [156] "Apol7b"     "Bmpr1b"     "Cntn3"      "Tenm4"      "Cadm2"     
## [161] "Actg2"      "Synpr"      "Cdh10"      "Ano5"       "Ccnb1"     
## [166] "Masp1"      "Flrt2"      "Atp12a"     "Gabrg3"     "C79798"    
## [171] "Mid1"       "Adam2"      "Cbln2"      "Nox3"       "Fam81b"    
## [176] "Ngfr"       "Slc35f1"    "Arhgap15"   "Olfr78"     "Dcn"       
## [181] "Adamts6"    "Alcam"      "Muc16"      "Grm7"       "Nell1os"   
## [186] "Npas3"      "Met"        "Cadps2"     "Fam47e"     "Rbfox1"    
## [191] "Lrrc4c"     "Fbxl7"      "Thsd4"      "Tafa1"      "Hccs"      
## [196] "Otof"       "Zbbx"       "Galnt13"    "Pdgfra"     "Ephb1"     
## [201] "Ccdc3"      "Loxl1"      "Ror2"       "Cep72"      "Grp"       
## [206] "Sctr"       "Gpc6"       "Ptprt"      "Thsd7b"     "Ror1"      
## [211] "Iigp1"      "Slc22a20"   "Mir99ahg"   "Iqgap2"     "Hcn1"      
## [216] "Plxdc2"     "Cdhr1"      "Mme"        "Dapk2"      "Eya1"      
## [221] "Adgrl2"     "Smtn"       "Prr16"      "Calcrl"     "Scn7a"     
## [226] "Serpine2"   "Sulf1"      "Rasgrf1"    "Sorcs1"     "Kng1"      
## [231] "Scgn"       "L3mbtl4"    "Antxr2"     "Bnc2"       "Acp7"      
## [236] "Adap2os"    "Dach1"      "Chst9"      "Lpp"        "Foxa3"     
## [241] "Hormad1"    "Hsd17b3"    "Scnn1a"     "Kit"        "Wee2"      
## [246] "Ntrk2"      "St8sia2"    "Hs3st4"     "Pigk"       "Necab1"    
## [251] "Ptprd"      "Tex14"      "Auts2"      "Cpne8"      "Mast4"     
## [256] "Sorcs3"     "Tex21"      "Spag16"     "Esrrb"      "Ggt5"      
## [261] "Igfbp5"     "Cartpt"     "Tenm1"      "Lhfpl2"     "Agtr1b"    
## [266] "Pde4c"      "Bcl2"       "Htr4"       "Slc13a1"    "Aldh1a2"   
## [271] "Plcb1"      "Vcan"       "Sntg2"      "Dmd"        "Klra17"    
## [276] "Stxbp6"     "Serpinb5"   "Rmst"       "Abcb5"      "Inhbc"     
## [281] "C1qtnf7"    "Cavin2"     "Moxd1"      "Esr1"       "Col18a1"   
## [286] "Prune2"     "Rgs6"       "Prkca"      "Brinp2"     "Gda"       
## [291] "Qrfpr"      "Abca8a"     "Rtl4"       "Nectin4"    "Plcb2"     
## [296] "Cga"        "Akr1d1"     "Ppp1r3b"    "Itgb2"      "Ovol2"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:25
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 13503
## Number of edges: 564169
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8056
## Number of communities: 32
## Elapsed time: 2 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 145)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 23:42:42 UMAP embedding parameters a = 0.9922 b = 1.112
## 23:42:42 Read 13503 rows and found 25 numeric columns
## 23:42:42 Using Annoy for neighbor search, n_neighbors = 20
## 23:42:42 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 23:42:43 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\Rtmp0UT5ed\file29245fd940c
## 23:42:43 Searching Annoy index using 1 thread, search_k = 2000
## 23:42:46 Annoy recall = 100%
## 23:42:47 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 23:42:48 Initializing from normalized Laplacian + noise (using irlba)
## 23:42:49 Commencing optimization for 200 epochs, with 404298 positive edges
## 23:43:02 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 3, cols = color.FB)

#DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
#        ncol = 3, cols = color.FB)
GEX.seur$cnt <- factor(gsub(".1$|.2$|.3$","",as.character(GEX.seur$FB.info)),
                       levels = c("CTL",
                                  "CKO"))
color.cnt <- color.FB[c(1,4)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

markers.new.ss <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Itga6","Cntnap5b",
                       "Pgm5","Chgb",
                       "Nxph1",
                       "Gabrb1","Glp2r","Nebl",
                       "Lrrc7",
                       "Ryr3","Eda",
                       "Hgf","Lama2","Efnb2",
                       "Tac1",
                       "Kctd8",
                       "Ptn",
                       "Ntrk2","Penk","Itgb8",
                       "Fut9","Nfatc1","Egfr","Pdia5",
                       "Ahr","Mgll",
                       "Aff3",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2",
                       "Col25a1",
                       "Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5","Mid1","Igfbp5",
                       "Ppara",
                       "Pcdh11x","Adcy8","Grp"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Nova1",
                      "Cdh10","Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Vipr2","Adamts9","Sst","Kcnn2"
                      ),
                 IPAN=c("Calb2","Adcy1","Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Itgb6","Met","Itgbl1",
                        
                        "Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9",
                        "Car10","Dcc",
                        "Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 CONTAM=c("Actl6b","Phox2b","Sox10","Plp1","L1cam","Gfap","Rxrg","Acta2","Actg2","Epcam","Pecam1","Ptprc"),
                 pDEGs=c("Grid2","Ide","Btaf1","Slc15a2","Ccser1","Hdac9","Rspo2","Grem2"))

#
pm.All_new.s1 <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.ss)), group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) #+ labs(title="All - newAnno1")
pm.All_new.s1

DoubletFinder

library(DoubletFinder)
for(i in 1:length(sweep.res.list)){
  if(length(sweep.res.list[[i]]$pANN[is.nan(sweep.res.list[[i]]$pANN)]) !=0){
    if(i!=1){
      sweep.res.list[[i]] <- sweep.res.list[[i-1]]
    }else(
      sweep.res.list[[i]] <- sweep.res.list[[i+1]]
    )
  }
}
sweep.stats <- summarizeSweep(sweep.res.list, GT=FALSE)
bcmvn <- find.pK(sweep.stats)

## NULL
pk_v <- as.numeric(as.character(bcmvn$pK))
pk_good <- pk_v[bcmvn$BCmetric==max(bcmvn$BCmetric)]
# specify expected doublet number     
nExp_poi <- round(0.05*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 4501 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.05"
# specify expected doublet number     
nExp_poi <- round(0.1*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 4501 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.1"
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
  DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")

sort_clusters

# sort_clusters
GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(3,0,7,10,8, 12, 4, 11, 17,    # EMN
                                            2,5,1, 14,23, 16, 22,       # IMN
                                            9, 18, 24,             # IN
                                            6,13, 19,15, 26, 20,      # IPAN
                                            
                                            21,30,28,31,29,    # Mix
                                            27,      # Glia
                                            25       # SMC
                                            ))


# preAnno1            
GEX.seur$preAnno1 <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(3,0,7,10,8)] <- "EMN1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(12)] <- "EMN2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(4)] <- "EMN3"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(11)] <- "EMN4"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(17)] <- "EMN5"

GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(2,5,1)] <- "IMN1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(14,23)] <- "IMN2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(16)] <- "IMN3"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(22)] <- "IMN4"

GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(9)] <- "IN1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(18)] <- "IN2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(24)] <- "IN3"

GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(6,13)] <- "IPAN1"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(19,15)] <- "IPAN2"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(26)] <- "IPAN3"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(20)] <- "IPAN4"

GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(21,30,28,31,29)] <- "MIX"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(27)] <- "Glia"
GEX.seur$preAnno1[GEX.seur$preAnno1 %in% c(25)] <- "SMC"

GEX.seur$preAnno1 <- factor(GEX.seur$preAnno1,
                            levels = c(paste0("EMN",1:5),
                                       paste0("IMN",1:4),
                                       paste0("IN",1:3),
                                       paste0("IPAN",1:4),
                                       "MIX","Glia","SMC"))


# preAnno2            
GEX.seur$preAnno2 <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(3,0,7,10,8)] <- "EMN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(12)] <- "EMN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(4)] <- "EMN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(11)] <- "EMN4"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(17)] <- "EMN5"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(2,5,1)] <- "IMN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(14,23)] <- "IMN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(16)] <- "IMN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(22)] <- "IMN4"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(9)] <- "IN1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(18)] <- "IN2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(24)] <- "IN3"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(6)] <- "IPAN1.1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(13)] <- "IPAN1.2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(19)] <- "IPAN2.1"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(15)] <- "IPAN2.2"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(26)] <- "IPAN3"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(20)] <- "IPAN4"

GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(21,30,28,31,29)] <- "MIX"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(27)] <- "Glia"
GEX.seur$preAnno2[GEX.seur$preAnno2 %in% c(25)] <- "SMC"

GEX.seur$preAnno2 <- factor(GEX.seur$preAnno2,
                            levels = c(paste0("EMN",1:5),
                                       paste0("IMN",1:4),
                                       paste0("IN",1:3),
                                       paste0("IPAN",c(1.1,1.2,2.1,2.2,3,4)),
                                       "MIX","Glia","SMC"))
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno1") +
  DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno2")

markers.new.ss <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Itga6","Cntnap5b",
                       "Pgm5","Chgb",
                       "Nxph1",
                       "Gabrb1","Glp2r","Nebl",
                       "Lrrc7",
                       "Ryr3","Eda",
                       "Hgf","Lama2","Efnb2",
                       "Tac1",
                       "Kctd8",
                       "Ptn",
                       "Ntrk2","Penk","Itgb8",
                       "Fut9","Nfatc1","Egfr","Pdia5",
                       "Ahr","Mgll",
                       "Aff3",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2",
                       "Col25a1",
                       "Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5","Mid1","Igfbp5",
                       "Ppara",
                       "Pcdh11x","Adcy8","Grp"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Nova1",
                      "Cdh10","Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Vipr2","Adamts9","Sst","Kcnn2"
                      ),
                 IPAN=c("Calb2","Adcy1","Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Itgb6","Met","Itgbl1",
                        
                        "Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9",
                        "Car10","Dcc",
                        "Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 CONTAM=c("Actl6b","Phox2b","Sox10","Plp1","L1cam","Gfap","Rxrg","Acta2","Actg2","Epcam","Pecam1","Ptprc"),
                 pDEGs=c("Grid2","Ide","Btaf1","Slc15a2","Ccser1","Hdac9","Rspo2","Grem2"))

#
pm.All_new.s2 <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.ss)), group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) #+ labs(title="All - newAnno1")
pm.All_new.s2

markers.new.ss <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Itga6","Cntnap5b",
                       "Pgm5","Chgb",
                       "Nxph1",
                       "Gabrb1","Glp2r","Nebl",
                       "Lrrc7",
                       "Ryr3","Eda",
                       "Hgf","Lama2","Efnb2",
                       "Tac1",
                       "Kctd8",
                       "Ptn",
                       "Ntrk2","Penk","Itgb8",
                       "Fut9","Nfatc1","Egfr","Pdia5",
                       "Ahr","Mgll",
                       "Aff3",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2",
                       "Col25a1",
                       "Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5","Mid1","Igfbp5",
                       "Ppara",
                       "Pcdh11x","Adcy8","Grp"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Nova1",
                      "Cdh10","Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Vipr2","Adamts9","Sst","Kcnn2"
                      ),
                 IPAN=c("Calb2","Adcy1","Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Itgb6","Met","Itgbl1",
                        
                        "Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9",
                        "Car10","Dcc",
                        "Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 CONTAM=c("Actl6b","Phox2b"#,"Sox10","Plp1","L1cam","Gfap","Rxrg","Acta2","Actg2","Epcam","Pecam1","Ptprc"
                          ),
                 pDEGs=c("Grid2","Ide","Btaf1","Slc15a2","Ccser1","Hdac9","Rspo2","Grem2"))

#
pm.All_new.p1 <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.ss)), group.by = "preAnno1",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) #+ labs(title="All - newAnno1")
pm.All_new.p1

pm.All_new.p2 <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.ss)), group.by = "preAnno2",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) #+ labs(title="All - newAnno1")
pm.All_new.p2

#saveRDS(GEX.seur, "GEX0429.seur.preAnno.rds")
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno1") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

pure

GEX.pure <- subset(GEX.seur, subset = preAnno1 %in% levels(GEX.seur$preAnno1)[1:16] & DoubletFinder0.1 == "Singlet")
GEX.pure
## An object of class Seurat 
## 22951 features across 11932 samples within 1 assay 
## Active assay: RNA (22951 features, 1040 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno1") +
  DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno2")

DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno1") +
  DimPlot(GEX.pure, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

DimPlot(GEX.pure, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 3, cols = color.FB)

#DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
#        ncol = 3, cols = color.FB)